# required packages - install first
require(estimatr)
require(tidyverse)
require(rdrobust)
require(rddensity)



###################################################################
###################################################################
#
#    When the state becomes complicit: mayors, criminal actors, 
#   and the deliberate weakening of the local state in Colombia 
#
#                   supplementary information
#
#                    Camilo Nieto-Matiz
#                 camilo.nieto-matiz@utsa.edu
###################################################################
###################################################################

 
# load dataframe
load("crimcollusion_colombia_main.rda")



################################################################
#         APPENDIX D. Testing model assumptions
################################################################


#================================================
#                  Figure D.1
#              Covariate balance
#================================================

# local linear specification
summary(bt_1<-rdrobust(y = log(cc_data$h_0407+1), x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri",covs=NULL, all = T))
summary(bt0<-rdrobust(y = cc_data$tx_0407, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri",covs=NULL, all = T))
summary(bt1<-rdrobust(y = cc_data$altura, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri",covs=NULL, all = T))
summary(bt2<-rdrobust(y = cc_data$areaoficialkm2, x = cc_data$share1, c = 0, p=1, vce = "nn",
                      kernel="tri", covs=NULL, all = T))
summary(bt3<-rdrobust(y = cc_data$disbogota, x = cc_data$share1, c = 0, p=1, vce = "nn",
                      kernel="tri",covs=NULL, all = T))
summary(bt4<-rdrobust(y = cc_data$dismdo, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri",covs=NULL, all = T))
summary(bt5<-rdrobust(y = log(cc_data$pib_percapita+1), x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri", covs=NULL, all = T))
summary(bt6<-rdrobust(y = log(cc_data$desemp_fisc+1), x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri", covs=NULL, all = T))
summary(bt7<-rdrobust(y = log(cc_data$y_transf_nal+1), x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri", covs=NULL, all = T))
summary(bt8<-rdrobust(y = cc_data$popdens, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri", covs=NULL, all = T))
summary(bt9<-rdrobust(y = cc_data$aptitud, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                      kernel="tri", covs=NULL, all = T))
summary(bt10<-rdrobust(y = cc_data$conflicto, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                       kernel="tri", covs=NULL, all = T))
summary(bt11<-rdrobust(y = cc_data$Violencia_48_a_53, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                       kernel="tri", covs=NULL, all = T))
summary(bt12<-rdrobust(y = cc_data$para_0407, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                       kernel="tri", covs=NULL, all = T))
summary(bt13<-rdrobust(y = cc_data$guer_0407, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                       kernel="tri", covs=NULL, all = T))
summary(bt14<-rdrobust(y = cc_data$police, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                       kernel="tri", covs=NULL, all = T))
summary(bt15<-rdrobust(y = cc_data$coca, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                       kernel="tri", covs=NULL, all = T))
summary(bt16<-rdrobust(y = cc_data$courts, x = cc_data$share1, c = 0, p=1, vce = "nn", 
                       kernel="tri", covs=NULL, all = T))


#ols - full sample
cc_data$treat <-ifelse(cc_data$share1>0,1,0)
summary(bt_1.full<-lm_robust(log(cc_data$h_0407+1)~share1*treat, data=cc_data))
summary(bt0.full<-lm_robust(tx_0407~share1*treat, data=cc_data))
summary(bt1.full<-lm_robust(altura~share1*treat, data=cc_data))
summary(bt2.full<-lm_robust(areaoficialkm2~share1*treat, data=cc_data))
summary(bt3.full<-lm_robust(disbogota~share1*treat, data=cc_data))
summary(bt4.full<-lm_robust(dismdo~share1*treat, data=cc_data))
summary(bt5.full<-lm_robust(log(cc_data$pib_percapita+1)~share1*treat, data=cc_data))
summary(bt6.full<-lm_robust(log(cc_data$desemp_fisc+1)~share1*treat, data=cc_data))
summary(bt7.full<-lm_robust(log(cc_data$y_transf_nal+1)~share1*treat, data=cc_data))
summary(bt8.full<-lm_robust(popdens~share1*treat, data=cc_data))
summary(bt9.full<-lm_robust(aptitud~share1*treat, data=cc_data))
summary(bt10.full<-lm_robust(conflicto~share1*treat, data=cc_data))
summary(bt11.full<-lm_robust(Violencia_48_a_53~share1*treat, data=cc_data))
summary(bt12.full<-lm_robust(para_0407~share1*treat, data=cc_data))
summary(bt13.full<-lm_robust(guer_0407~share1*treat, data=cc_data))
summary(bt14.full<-lm_robust(police~share1*treat, data=cc_data))
summary(bt15.full<-lm_robust(coca~share1*treat, data=cc_data))
summary(bt16.full<-lm_robust(courts~share1*treat, data=cc_data))



#local linear
balance <- data.frame(Var =    c('Homicide rates 04-07', 'Property taxation 04-07', 'Altitude','Area km2', 'Distance to Bogotá','Distance to market',
                                 'Per capita GDP','Fiscal performance','National transfers', 'Population density',
                                 'Soil quality', 'Land conflicts 1901-31', 'Violence 1948-43', 'Paramilitary actions',
                                 'Guerrilla actions', 'Police station', 'Coca', 'Courts'),
                      Estimate =   c(bt_1$coef[3],bt0$coef[3],bt1$coef[3],bt2$coef[3],bt3$coef[3],bt4$coef[3],bt5$coef[3],bt6$coef[3],bt7$coef[3],
                                     bt8$coef[3],bt9$coef[3],bt10$coef[3],bt11$coef[3],bt12$coef[3],bt13$coef[3],bt14$coef[3],
                                     bt15$coef[3],bt16$coef[3]),
                      pvalue =   c(bt_1$pv[3],bt0$pv[3],bt1$pv[3],bt2$pv[3],bt3$pv[3],bt4$pv[3],bt5$pv[3],bt6$pv[3],bt7$pv[3],bt8$pv[3],bt9$pv[3],
                                   bt10$pv[3],bt11$pv[3],bt12$pv[3],bt13$pv[3],bt14$pv[3],bt15$pv[3],bt16$pv[3]),
                      SE =  c(bt_1$se[3],bt0$se[3],bt1$se[3],bt2$se[3],bt3$se[3],bt4$se[3],bt5$se[3],bt6$se[3],bt7$se[3],bt8$se[3],bt9$se[3],
                              bt10$se[3],bt11$se[3],bt12$se[3],bt13$se[3],bt14$se[3], bt15$se[3], bt16$se[3]))
balance$Var <- factor(balance$Var, ordered(balance$Var))
balance$Specification <- "Local"

# ols
balance1 <- data.frame(Var =    c('Homicide rates 04-07', 'Property taxation 04-07', 'Altitude','Area km2', 'Distance to Bogotá','Distance to market',
                                  'Per capita GDP','Fiscal performance','National transfers', 'Population density',
                                  'Soil quality', 'Land conflicts 1901-31', 'Violence 1948-43', 'Paramilitary actions',
                                  'Guerrilla actions', 'Police station', 'Coca', 'Courts'),
                       Estimate =  c(bt_1.full$coefficients[3],bt0.full$coefficients[3],bt1.full$coefficients[3],bt2.full$coefficients[3],bt3.full$coefficients[3],bt4.full$coefficients[3],bt5.full$coefficients[3],bt6.full$coefficients[3],bt7.full$coefficients[3],
                                     bt8.full$coefficients[3],bt9.full$coefficients[3],bt10.full$coefficients[3],bt11.full$coefficients[3],bt12.full$coefficients[3],bt13.full$coefficients[3],bt14.full$coefficients[3],
                                     bt15.full$coefficients[3],bt16.full$coefficients[3]),
                       pvalue =   c(bt_1.full$p.value[3],bt0.full$p.value[3],bt1.full$p.value[3],bt2.full$p.value[3],bt3.full$p.value[3],bt4.full$p.value[3],bt5.full$p.value[3],bt6.full$p.value[3],bt7.full$p.value[3],
                                    bt8.full$p.value[3],bt9.full$p.value[3],bt10.full$p.value[3],bt11.full$p.value[3],bt12.full$p.value[3],bt13.full$p.value[3],bt14.full$p.value[3],
                                    bt15.full$p.value[3],bt16.full$p.value[3]),
                       SE =   c(bt_1.full$std.error[3],bt0.full$std.error[3],bt1.full$std.error[3],bt2.full$std.error[3],bt3.full$std.error[3],bt4.full$std.error[3],bt5.full$std.error[3],bt6.full$std.error[3],bt7.full$std.error[3],
                                bt8.full$std.error[3],bt9.full$std.error[3],bt10.full$std.error[3],bt11.full$std.error[3],bt12.full$std.error[3],bt13.full$std.error[3],bt14.full$std.error[3],
                                bt15.full$std.error[3],bt16.full$std.error[3]))
balance1$Var <- factor(balance1$Var, ordered(balance$Var))
balance1$Specification <- "Full"

# bind frames
balance.data <- rbind(balance,balance1)


# plot 
ggplot(balance.data)+ 
  geom_point(aes(x = pvalue, y=Var, color=Specification, shape=Specification), 
             lwd = 2, position = position_dodge(width = 0.5))+
  geom_vline(xintercept = 0.05, colour = "red", lty = 2, lwd=0.3)+
  scale_color_manual(values=c("darkorange", "olivedrab"))+
  xlim(0,1)+
  ylab('')+xlab('p-value')+theme_bw()+
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(hjust = 0),
        axis.text.y = element_text(hjust = 0),
        legend.position="bottom")
ggsave("balance test.png", width = 5, height = 5)

#================================================
#                  Figure D.2-a
#              McCrary's density test
#================================================

rdd <- rddensity(X = cc_data$share1, all = T, fitselect = "unrestricted", 
                 vce="jackknife");summary(rdd)
rdplotdensity(rdd, cc_data$share1)
ggsave("density test.png", width = 5, height = 4)


#================================================
#                  Figure D.2-b
#        Histogram of the forcing variable
#================================================

ggplot(cc_data, aes(share1))+
  geom_histogram(bins=30, alpha=.7)+
  geom_vline(xintercept = 0, color="red", size=.2)+
  labs(x="Vote margin", y="")+theme_bw()
ggsave("mv_hist.png", width = 5, height = 4)




#================================================
#                  Figure D.2-b
#                 Placebo - cutoffs
#================================================

cutoff <- sort(c(seq(-.3,.3, by=.04),0))

# matrix to store taxes estimates
store.t <- as.data.frame(matrix(NA,17,3))
colnames(store.t)<-c('coef','se','pv')

# Initiate loop for taxes #
for (i in 1:length(cutoff)) {
  # Estimate model
  summary(tx_loop <- rdrobust(cc_data$tx_0811, x = cc_data$share1, c = cutoff[i], p=1, vce = "nn", 
                              bwselect = "mserd", kernel="tri",  all = T))
  # Extract 3rd row of rd_out() LC function (robust RDD estimate)
  # Place into loop holder
  store.t[[i,1]] <- tx_loop$coef[3]
  store.t[[i,2]] <- tx_loop$se[3]
  store.t[[i,3]] <- tx_loop$pv[3]
  store.t$c <- cutoff
  store.t$zero<-ifelse(store.t$c==0,1,0)
  
}

 
# plot placebos
store.t %>%
  ggplot()+ 
  geom_errorbar(aes(x = factor(c), ymin = coef - se*1.96,
                    ymax = coef + se*1.96, color=factor(zero), lwd=factor(zero)), width=.15)+
  scale_color_manual(values=c("gray50", "blue"))+
  scale_size_manual(values=c(.4,.7))+ylim(-200,300)+
  geom_point(aes(x = factor(c), y = coef), lwd = 1.5, shape = 20)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  xlab('') + ylab('') + theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),legend.position = "none",
        axis.text.x = element_text( size=7))
ggsave("mv_hist.png", width = 6, height = 4)





################################################################
#      APPENDIX E. RD estimates with covariate adjustment
################################################################

# covariates
covs <- cbind(log(cc_data$popdens+1),  
              log(cc_data$coca_hec+1),  log(cc_data$aptitud+1))

covs1 <- cbind(log(cc_data$popdens+1),  
               log(cc_data$coca_hec+1),  log(cc_data$aptitud+1),
               log(cc_data$cad_lag+1),
               log(cc_data$jud_lag+1))


#================================================
#                  Table E.1
#                 RD estimates
#================================================

summary(rdrobust(cc_data$tx_0811,cc_data$share1,c=0,p=1,  
                 all = T, vce = "nn", kernel="tri",covs=covs))

summary(rdrobust(cc_data$tx_0811,cc_data$share1,c=0,p=2,  
                 all = T, vce = "nn", kernel="tri",covs=covs))




#================================================
#                  Table E.2
#         Mechanisms - with covariate adjustment
#================================================

summary(rdrobust(cc_data$cad_0809,cc_data$share1,c=0,p=1,  
                 all = T, vce = "nn", kernel="tri",covs=covs))

summary(rdrobust(cc_data$val_0809,cc_data$share1,c=0,p=1,  
                 all = T, vce = "nn", kernel="tri",covs=covs))

summary(rdrobust(log(cc_data$inf_0809+1),cc_data$share1,c=0,p=1,  
                 all = T, vce = "nn", kernel="tri",covs=covs))

summary(tx0<-rdrobust(cc_data$just_0809,cc_data$share1,c=0,p=1,  
                      all = T, vce = "nn", kernel="tri",covs=covs))



#================================================
#                  Table E.3
#       RD estimates with additional covariates
#================================================

summary(rdrobust(cc_data$tx_0811,cc_data$share1,c=0,p=1,  
                 all = T, vce = "nn", kernel="tri",covs=covs1))

summary(rdrobust(cc_data$tx_0811,cc_data$share1,c=0,p=2,  
                 all = T, vce = "nn", kernel="tri",covs=covs1))




################################################################
#      APPENDIX F. Placebo Test: Preexisting Trends
################################################################

#================================================
#                Figure F.1-a
#          Different bandwidths - 04-07
#================================================


# matrix to store homicide estimates
store_bw2 <- as.data.frame(matrix(NA,34,3))

# Initiate loop for taxes 08-11
bandw <- sort(c(seq(.04,.2,by=.005), 0.085)) #calonico's optimal bw: 
for (i in 1:length(bandw)) {
  # Estimate model
  summary(bw_loop2 <- rdrobust(y=cc_data$tx_0407, x = cc_data$share1, c =0, p=1,h=bandw[i],vce = "nn", 
                               kernel="tri", covs=covs, all = T)) 
  # Extract 3rd row of rd_out() LC function (robust RDD estimate)
  # Place into loop holder
  store_bw2[[i,1]] <- bw_loop2$coef[3]
  store_bw2[[i,2]] <- bw_loop2$se[3]
  store_bw2[[i,3]] <- bw_loop2$pv[3]
  store_bw2$c <- bandw
  store_bw2$c<-round(store_bw2$c, digits = 4)
}
colnames(store_bw2)<-c('coef','se','pv','c')
store_bw2$bw<-ifelse(store_bw2$c==.085,1,0)


# bandwidth plot
ggplot(store_bw2)+ 
  geom_errorbar(aes(x = factor(c), ymin = coef - se*1.96,
                    ymax = coef + se*1.96, color=factor(bw), lwd = factor(bw)), width=.4)+
  scale_color_manual(values=c("steelblue3", "blue"))+
  scale_size_manual(values=c(.4,.7))+
  geom_point(aes(x = factor(c), y = coef), lwd = 1.5, shape = 20)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  ylim(-90,50)+xlab('') + ylab('') + theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),legend.position = "none")+
  scale_x_discrete(breaks=seq(0.04, .2, by = .02))
ggsave("bw_placebo2_para_tx0407.png", height = 5, width = 6)





#================================================
#                Figure F.1-b
#          Different bandwidths - 04-07
#================================================

# matrix to store homicide estimates
store_bw3 <- as.data.frame(matrix(NA,34,3))

# Initiate loop for taxes 08-11
bandw <- sort(c(seq(.04,.2,by=.005), 0.098)) #calonico's optimal bw: 
for (i in 1:length(bandw)) {
  # Estimate model
  summary(bw_loop3 <- rdrobust(y=cc_data$tx_0407, x = cc_data$share1, c =0, p=1,h=bandw[i],vce = "nn", 
                               kernel="tri", covs=covs, all = T)) 
  # Extract 3rd row of rd_out() LC function (robust RDD estimate)
  # Place into loop holder
  store_bw3[[i,1]] <- bw_loop3$coef[3]
  store_bw3[[i,2]] <- bw_loop3$se[3]
  store_bw3[[i,3]] <- bw_loop3$pv[3]
  store_bw3$c <- bandw
  store_bw3$c<-round(store_bw3$c, digits = 4)
}
colnames(store_bw3)<-c('coef','se','pv','c')
store_bw3$bw<-ifelse(store_bw3$c==.098,1,0)


# bandwidth plot
ggplot(store_bw3)+ 
  geom_errorbar(aes(x = factor(c), ymin = coef - se*1.96,
                    ymax = coef + se*1.96, color=factor(bw), lwd = factor(bw)), width=.4)+
  scale_color_manual(values=c("steelblue3", "blue"))+
  scale_size_manual(values=c(.4,.7))+
  geom_point(aes(x = factor(c), y = coef), lwd = 1.5, shape = 20)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  ylim(-90,50)+xlab('') + ylab('') + theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),legend.position = "none")+
  scale_x_discrete(breaks=seq(0.04, .2, by = .02))
ggsave("bw_placebo2_para_tx0407_covs.png", height = 5, width = 6)




################################################################
#      APPENDIX G.   Effects over time 2012-2015
################################################################

#================================================
#                  Table G.1  
#        Effects over time 2012-15, RD estimates
#================================================

summary(rdrobust(cc_data$tx_1215,cc_data$share1,c=0,p=1,  
                     all = T, vce = "nn", kernel="tri",covs=NULL))

summary(rdrobust(cc_data$tx_1215,cc_data$share1,c=0,p=2,  
                 all = T, vce = "nn", kernel="tri",covs=NULL))



#================================================
#                  Figure G.1  
#       Property taxes, 2012-2015 - RD plot
#================================================

rd.data1 <- cc_data  
# make the binned averages - bins on the left
count <- 1
bin.size <- .02
binx.left <- vector(length=length(seq(-.2, 0, bin.size)))
biny.left <- vector(length=length(binx.left))
last <- -.02
for(j in seq(-2, 0, bin.size)) {
  biny.left[count] <- mean(rd.data1$tx_1215[rd.data1$share1 >= j-bin.size & rd.data1$share1 < j], na.rm=T)
  binx.left[count] <- (j+last)/2
  last <- j
  count <- count + 1
}

# bins on the tight
count <- 1
bin.size <- .02
binx.right <- vector(length=length(seq(0,.2, bin.size)))
biny.right <- vector(length=length(binx.right))
last <- 0.2
for(j in seq(0,2, bin.size)) {
  biny.right[count] <- mean(rd.data1$tx_1215[rd.data1$share1 >= j-bin.size & rd.data1$share1 < j], na.rm=T)
  binx.right[count] <- (j+last)/2
  last <- j
  count <- count + 1
}


temp1 <- data.frame(cbind(binx.left, biny.left))
temp2 <- data.frame(cbind(binx.right, biny.right))
colnames(temp2) <- c("binx.left", "biny.left")
temp <- rbind(temp1, temp2)

# select variables
rd.data1<- select(rd.data1, share1, tx_1215) %>% dplyr::rename(binx.left=share1, biny.left=tx_1215)

# create plot
ggplot(data=temp, aes(x=binx.left, y =  biny.left)) +
  geom_point(data=subset(temp, binx.left >= 0&binx.left <= 2),color="royalblue2",alpha = .4, size = 4.5) +
  geom_point(data=subset(temp, binx.left <= 0&binx.left >= -2),color="tomato2",alpha = .4, size = 4.5) +
  geom_vline(xintercept = 0) +
  geom_smooth(data=subset(rd.data1, binx.left >= 0&binx.left <= .5),method = "loess",formula = y ~ poly(x, 2), se = TRUE, size = 1.5, fill="blue", color = "gray10", span = 1, alpha = .4)  +
  geom_smooth(data=subset(rd.data1, binx.left <= 0.0&binx.left >= -2),method = "loess",formula = y ~ poly(x, 2),se = TRUE, size = 1.5, fill="red", color = "gray10", span = 1, alpha = .4)  +
  coord_cartesian(xlim = c(-.2, .2))+ylim(0,80)+
  labs(x="Margin of Vote", y="Property Taxes") +
  theme_bw()+
  theme(text = element_text(hjust = 0.5, family="Palatino", size=12))
ggsave("rdplot_tax1215.png", height = 5, width = 6)





################################################################
#      APPENDIX H. OLS: alternative empirical strategy
################################################################

#=================================================================
#                        Table H.1  
#          Effect of paramilitary mayor on taxation (OLS)
#=================================================================


summary(lm_robust(tx_0811~para_mayor+colonialstate+Violencia_48_a_53+conflicto+coca+altura+
                       guer_0407+para_0407+log(aptitud+1),
                     data=cc_data))

 
 
#=================================================================
#                        Table H.2  
#          Effect of paramilitary mayor on prop rights (OLS)
#=================================================================


summary(a<-lm_robust(cad_0809+1~para_mayor+colonialstate+Violencia_48_a_53+conflicto+coca+altura+
                    guer_0407+para_0407+log(aptitud+1),
                  data=cc_data))

summary(b<-lm_robust(inf_0809+1~para_mayor+colonialstate+Violencia_48_a_53+conflicto+coca+altura+
                    guer_0407+para_0407+log(aptitud+1),
                  data=cc_data))

summary(c<-lm_robust(val_0809+1~para_mayor+colonialstate+Violencia_48_a_53+conflicto+coca+altura+
                    guer_0407+para_0407+log(aptitud+1),
                  data=cc_data))

 
